{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.chdir(r'H:\\平台资源配套\\4 Python数据分析与应用\\01-数据和代码\\第5章\\01-任务程序')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sqlalchemy import create_engine\n", "\n", "engin = create_engine('mysql+pymysql://root:123456@127.0.0.1:3306/test?charset=utf8')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = pd.read_sql('meal_order_detail1', con=engin)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a = data.iloc[:, :10]\n", "b = data.iloc[:, 10:]\n", "pd.concat([a, b], axis=1, join='inner').shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a = data.iloc[:100, :]\n", "b = data.iloc[100:, :]\n", "pd.concat([a, b]).shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a.append(b).shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "order = pd.read_csv('./data/meal_order_info.csv', encoding='gbk')\n", "order.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "order['info_id'].dtype" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data['order_id'].dtype" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "order['info_id'] = order['info_id'].astype(str)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "order['info_id'].dtype" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.merge(order, data, left_on='info_id', right_on='order_id').shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "order.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.merge(order, data, left_on='info_id', right_on='order_id')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "order.rename({'info_id': 'order_id'}, inplace=True)\n", "data.join(order, on='order_id', rsuffix='1').shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "dis1 = {'id': list(range(1, 10)), \n", " 'cpu': ['i7', 'i5', np.nan, 'i7', 'i7', 'i5', np.nan, np.nan, 'i5']}\n", "a = pd.DataFrame(dis1)\n", "\n", "dis2 = {'id': list(range(1, 10)), \n", " 'cpu': ['i7', 'i5', 'i5', np.nan, 'i7', 'i5', 'i5', np.nan, 'i5']}\n", "b = pd.DataFrame(dis2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a.combine_first(b)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data['dishes_name'].drop_duplicates()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.drop_duplicates().shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.drop_duplicates(subset=['order_id', 'emp_id']).shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data[['counts', 'amounts']].corr()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data[['counts', 'amounts', 'dishes_name']].corr()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sim_dis = pd.DataFrame([], \n", " index=['counts', 'amounts', 'dishes_name'], \n", " columns=['counts', 'amounts', 'dishes_name'])\n", "for i in ['counts', 'amounts', 'dishes_name']:\n", " for j in ['counts', 'amounts', 'dishes_name']:\n", " sim_dis.loc[i, j] = data[i].equals(data[j])\n", "print(sim_dis)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.isnull().sum()\n", "data.notnull().sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data['logicprn_name']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dis1 = {'id': list(range(1, 10)), \n", " 'cpu': ['i7', 'i5', np.nan, 'i7', 'i7', 'i5', np.nan, np.nan, 'i5']}\n", "a = pd.DataFrame(dis1)\n", "a.isnull()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a.dropna()\n", "a.dropna(subset=['id'])\n", "a.dropna(axis=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a['cpu'].fillna(a['cpu'].value_counts().index[0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a['cpu'].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dis1 = {'id': list(range(1, 10)), \n", " 'cpu': [7, 7, np.nan, 5, 4, 6, np.nan, np.nan, 7]}\n", "b = pd.DataFrame(dis1)\n", "b['cpu'].fillna(b['cpu'].mean())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x = np.array([1, 2, 3, 6, 7])\n", "y = np.array([3, 5, 6, 9, 13])\n", "from scipy.interpolate import interp1d\n", "model = interp1d(x, y, kind='linear')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model([4, 5])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "plt.scatter(x, y)\n", "plt.plot(x, model(x), 'r-')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from scipy.interpolate import lagrange\n", "f_lag = lagrange(x, y)\n", "f_lag([4, 5])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "plt.scatter(x, y)\n", "plt.scatter([4, 5], model([4, 5]))\n", "plt.scatter([4, 5], f_lag([4, 5]))\n", "plt.plot(x, model(x), 'r-')\n", "plt.plot(x, f_lag(x), 'g-')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from scipy.interpolate import spline, BSpline\n", "y_bs = BSpline(x, y, k=1)\n", "y_bs([4, 5])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "help(BSpline)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data['counts']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "u = data['counts'].mean()\n", "o = data['counts'].std()\n", "ind = data['counts'].apply(lambda x: x > u+3*o or x< u-3*o)\n", "data.loc[ind, 'counts']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "p = plt.boxplot(data['counts'])\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "p['fliers'][0].get_ydata()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
detail_idorder_iddishes_idlogicprn_nameparent_class_namedishes_nameitemis_addcountsamountscostplace_order_timediscount_amtdiscount_reasonkick_backadd_inpriceadd_infobar_codepicture_fileemp_id
02956417610062NaNNaN蒜蓉生蚝0149NaN2016/8/111:05:00NaNNaNNaN0NaNNaNcaipu/104001.jpg1442
12958417609957NaNNaN蒙古烤羊腿0148NaN2016/8/111:07:00NaNNaNNaN0NaNNaNcaipu/202003.jpg1442
22961417609950NaNNaN大蒜苋菜0130NaN2016/8/111:07:00NaNNaNNaN0NaNNaNcaipu/303001.jpg1442
32966417610038NaNNaN芝麻烤紫菜0125NaN2016/8/111:11:00NaNNaNNaN0NaNNaNcaipu/105002.jpg1442
42968417610003NaNNaN蒜香包0113NaN2016/8/111:11:00NaNNaNNaN0NaNNaNcaipu/503002.jpg1442
\n", "
" ], "text/plain": [ " detail_id order_id dishes_id logicprn_name parent_class_name \\\n", "0 2956 417 610062 NaN NaN \n", "1 2958 417 609957 NaN NaN \n", "2 2961 417 609950 NaN NaN \n", "3 2966 417 610038 NaN NaN \n", "4 2968 417 610003 NaN NaN \n", "\n", " dishes_name itemis_add counts amounts cost place_order_time \\\n", "0 蒜蓉生蚝 0 1 49 NaN 2016/8/111:05:00 \n", "1 蒙古烤羊腿 0 1 48 NaN 2016/8/111:07:00 \n", "2 大蒜苋菜 0 1 30 NaN 2016/8/111:07:00 \n", "3 芝麻烤紫菜 0 1 25 NaN 2016/8/111:11:00 \n", "4 蒜香包 0 1 13 NaN 2016/8/111:11:00 \n", "\n", " discount_amt discount_reason kick_back add_inprice add_info bar_code \\\n", "0 NaN NaN NaN 0 NaN NaN \n", "1 NaN NaN NaN 0 NaN NaN \n", "2 NaN NaN NaN 0 NaN NaN \n", "3 NaN NaN NaN 0 NaN NaN \n", "4 NaN NaN NaN 0 NaN NaN \n", "\n", " picture_file emp_id \n", "0 caipu/104001.jpg 1442 \n", "1 caipu/202003.jpg 1442 \n", "2 caipu/303001.jpg 1442 \n", "3 caipu/105002.jpg 1442 \n", "4 caipu/503002.jpg 1442 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "data = pd.read_csv('./data/detail.csv', encoding='gbk')\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countsamounts
0149
1148
2130
3125
4113
5188
6155
7188
8148
9132
10110
111175
121109
13116
14155
15188
16133
17169
18135
19135
20120
21139
22148
23135
24139
25137
26148
27158
28139
29136
.........
10007113
100081108
10009130
10010113
10011189
10012119
10013119
10014120
10015166
10016135
1001719
1001817
1001916
10020118
10021119
10022130
10023133
1002417
10025116
10026119
10027130
10028133
10029156
10030126
10031129
10032135
10033136
10034139
1003517
10036127
\n", "

10037 rows × 2 columns

\n", "
" ], "text/plain": [ " counts amounts\n", "0 1 49\n", "1 1 48\n", "2 1 30\n", "3 1 25\n", "4 1 13\n", "5 1 88\n", "6 1 55\n", "7 1 88\n", "8 1 48\n", "9 1 32\n", "10 1 10\n", "11 1 175\n", "12 1 109\n", "13 1 16\n", "14 1 55\n", "15 1 88\n", "16 1 33\n", "17 1 69\n", "18 1 35\n", "19 1 35\n", "20 1 20\n", "21 1 39\n", "22 1 48\n", "23 1 35\n", "24 1 39\n", "25 1 37\n", "26 1 48\n", "27 1 58\n", "28 1 39\n", "29 1 36\n", "... ... ...\n", "10007 1 13\n", "10008 1 108\n", "10009 1 30\n", "10010 1 13\n", "10011 1 89\n", "10012 1 19\n", "10013 1 19\n", "10014 1 20\n", "10015 1 66\n", "10016 1 35\n", "10017 1 9\n", "10018 1 7\n", "10019 1 6\n", "10020 1 18\n", "10021 1 19\n", "10022 1 30\n", "10023 1 33\n", "10024 1 7\n", "10025 1 16\n", "10026 1 19\n", "10027 1 30\n", "10028 1 33\n", "10029 1 56\n", "10030 1 26\n", "10031 1 29\n", "10032 1 35\n", "10033 1 36\n", "10034 1 39\n", "10035 1 7\n", "10036 1 27\n", "\n", "[10037 rows x 2 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[['counts', 'amounts']]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countsamounts
00.00.271186
10.00.265537
20.00.163842
30.00.135593
40.00.067797
50.00.491525
60.00.305085
70.00.491525
80.00.265537
90.00.175141
100.00.050847
110.00.983051
120.00.610169
130.00.084746
140.00.305085
150.00.491525
160.00.180791
170.00.384181
180.00.192090
190.00.192090
200.00.107345
210.00.214689
220.00.265537
230.00.192090
240.00.214689
250.00.203390
260.00.265537
270.00.322034
280.00.214689
290.00.197740
.........
100070.00.067797
100080.00.604520
100090.00.163842
100100.00.067797
100110.00.497175
100120.00.101695
100130.00.101695
100140.00.107345
100150.00.367232
100160.00.192090
100170.00.045198
100180.00.033898
100190.00.028249
100200.00.096045
100210.00.101695
100220.00.163842
100230.00.180791
100240.00.033898
100250.00.084746
100260.00.101695
100270.00.163842
100280.00.180791
100290.00.310734
100300.00.141243
100310.00.158192
100320.00.192090
100330.00.197740
100340.00.214689
100350.00.033898
100360.00.146893
\n", "

10037 rows × 2 columns

\n", "
" ], "text/plain": [ " counts amounts\n", "0 0.0 0.271186\n", "1 0.0 0.265537\n", "2 0.0 0.163842\n", "3 0.0 0.135593\n", "4 0.0 0.067797\n", "5 0.0 0.491525\n", "6 0.0 0.305085\n", "7 0.0 0.491525\n", "8 0.0 0.265537\n", "9 0.0 0.175141\n", "10 0.0 0.050847\n", "11 0.0 0.983051\n", "12 0.0 0.610169\n", "13 0.0 0.084746\n", "14 0.0 0.305085\n", "15 0.0 0.491525\n", "16 0.0 0.180791\n", "17 0.0 0.384181\n", "18 0.0 0.192090\n", "19 0.0 0.192090\n", "20 0.0 0.107345\n", "21 0.0 0.214689\n", "22 0.0 0.265537\n", "23 0.0 0.192090\n", "24 0.0 0.214689\n", "25 0.0 0.203390\n", "26 0.0 0.265537\n", "27 0.0 0.322034\n", "28 0.0 0.214689\n", "29 0.0 0.197740\n", "... ... ...\n", "10007 0.0 0.067797\n", "10008 0.0 0.604520\n", "10009 0.0 0.163842\n", "10010 0.0 0.067797\n", "10011 0.0 0.497175\n", "10012 0.0 0.101695\n", "10013 0.0 0.101695\n", "10014 0.0 0.107345\n", "10015 0.0 0.367232\n", "10016 0.0 0.192090\n", "10017 0.0 0.045198\n", "10018 0.0 0.033898\n", "10019 0.0 0.028249\n", "10020 0.0 0.096045\n", "10021 0.0 0.101695\n", "10022 0.0 0.163842\n", "10023 0.0 0.180791\n", "10024 0.0 0.033898\n", "10025 0.0 0.084746\n", "10026 0.0 0.101695\n", "10027 0.0 0.163842\n", "10028 0.0 0.180791\n", "10029 0.0 0.310734\n", "10030 0.0 0.141243\n", "10031 0.0 0.158192\n", "10032 0.0 0.192090\n", "10033 0.0 0.197740\n", "10034 0.0 0.214689\n", "10035 0.0 0.033898\n", "10036 0.0 0.146893\n", "\n", "[10037 rows x 2 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def MinMaxScale(data):\n", " return (data - data.min()) / (data.max()-data.min())\n", "\n", "a = MinMaxScale(data['counts'])\n", "b = MinMaxScale(data['amounts'])\n", "pd.concat([a, b], axis=1)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countsamounts
0-0.1775710.116671
1-0.1775710.088751
2-0.177571-0.413826
3-0.177571-0.553431
4-0.177571-0.888482
5-0.1775711.205587
6-0.1775710.284197
7-0.1775711.205587
8-0.1775710.088751
9-0.177571-0.357984
10-0.177571-0.972245
11-0.1775713.634708
12-0.1775711.791927
13-0.177571-0.804719
14-0.1775710.284197
15-0.1775711.205587
16-0.177571-0.330063
17-0.1775710.675090
18-0.177571-0.274221
19-0.177571-0.274221
20-0.177571-0.693035
21-0.177571-0.162538
22-0.1775710.088751
23-0.177571-0.274221
24-0.177571-0.162538
25-0.177571-0.218380
26-0.1775710.088751
27-0.1775710.367960
28-0.177571-0.162538
29-0.177571-0.246301
.........
10007-0.177571-0.888482
10008-0.1775711.764006
10009-0.177571-0.413826
10010-0.177571-0.888482
10011-0.1775711.233508
10012-0.177571-0.720956
10013-0.177571-0.720956
10014-0.177571-0.693035
10015-0.1775710.591327
10016-0.177571-0.274221
10017-0.177571-1.000165
10018-0.177571-1.056007
10019-0.177571-1.083928
10020-0.177571-0.748877
10021-0.177571-0.720956
10022-0.177571-0.413826
10023-0.177571-0.330063
10024-0.177571-1.056007
10025-0.177571-0.804719
10026-0.177571-0.720956
10027-0.177571-0.413826
10028-0.177571-0.330063
10029-0.1775710.312118
10030-0.177571-0.525510
10031-0.177571-0.441747
10032-0.177571-0.274221
10033-0.177571-0.246301
10034-0.177571-0.162538
10035-0.177571-1.056007
10036-0.177571-0.497589
\n", "

10037 rows × 2 columns

\n", "
" ], "text/plain": [ " counts amounts\n", "0 -0.177571 0.116671\n", "1 -0.177571 0.088751\n", "2 -0.177571 -0.413826\n", "3 -0.177571 -0.553431\n", "4 -0.177571 -0.888482\n", "5 -0.177571 1.205587\n", "6 -0.177571 0.284197\n", "7 -0.177571 1.205587\n", "8 -0.177571 0.088751\n", "9 -0.177571 -0.357984\n", "10 -0.177571 -0.972245\n", "11 -0.177571 3.634708\n", "12 -0.177571 1.791927\n", "13 -0.177571 -0.804719\n", "14 -0.177571 0.284197\n", "15 -0.177571 1.205587\n", "16 -0.177571 -0.330063\n", "17 -0.177571 0.675090\n", "18 -0.177571 -0.274221\n", "19 -0.177571 -0.274221\n", "20 -0.177571 -0.693035\n", "21 -0.177571 -0.162538\n", "22 -0.177571 0.088751\n", "23 -0.177571 -0.274221\n", "24 -0.177571 -0.162538\n", "25 -0.177571 -0.218380\n", "26 -0.177571 0.088751\n", "27 -0.177571 0.367960\n", "28 -0.177571 -0.162538\n", "29 -0.177571 -0.246301\n", "... ... ...\n", "10007 -0.177571 -0.888482\n", "10008 -0.177571 1.764006\n", "10009 -0.177571 -0.413826\n", "10010 -0.177571 -0.888482\n", "10011 -0.177571 1.233508\n", "10012 -0.177571 -0.720956\n", "10013 -0.177571 -0.720956\n", "10014 -0.177571 -0.693035\n", "10015 -0.177571 0.591327\n", "10016 -0.177571 -0.274221\n", "10017 -0.177571 -1.000165\n", "10018 -0.177571 -1.056007\n", "10019 -0.177571 -1.083928\n", "10020 -0.177571 -0.748877\n", "10021 -0.177571 -0.720956\n", "10022 -0.177571 -0.413826\n", "10023 -0.177571 -0.330063\n", "10024 -0.177571 -1.056007\n", "10025 -0.177571 -0.804719\n", "10026 -0.177571 -0.720956\n", "10027 -0.177571 -0.413826\n", "10028 -0.177571 -0.330063\n", "10029 -0.177571 0.312118\n", "10030 -0.177571 -0.525510\n", "10031 -0.177571 -0.441747\n", "10032 -0.177571 -0.274221\n", "10033 -0.177571 -0.246301\n", "10034 -0.177571 -0.162538\n", "10035 -0.177571 -1.056007\n", "10036 -0.177571 -0.497589\n", "\n", "[10037 rows x 2 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def StandScale(data):\n", " return (data - data.mean()) / data.std()\n", "\n", "a = StandScale(data['counts'])\n", "b = StandScale(data['amounts'])\n", "pd.concat([a, b], axis=1)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countsamounts
00.10.049
10.10.048
20.10.030
30.10.025
40.10.013
50.10.088
60.10.055
70.10.088
80.10.048
90.10.032
100.10.010
110.10.175
120.10.109
130.10.016
140.10.055
150.10.088
160.10.033
170.10.069
180.10.035
190.10.035
200.10.020
210.10.039
220.10.048
230.10.035
240.10.039
250.10.037
260.10.048
270.10.058
280.10.039
290.10.036
.........
100070.10.013
100080.10.108
100090.10.030
100100.10.013
100110.10.089
100120.10.019
100130.10.019
100140.10.020
100150.10.066
100160.10.035
100170.10.009
100180.10.007
100190.10.006
100200.10.018
100210.10.019
100220.10.030
100230.10.033
100240.10.007
100250.10.016
100260.10.019
100270.10.030
100280.10.033
100290.10.056
100300.10.026
100310.10.029
100320.10.035
100330.10.036
100340.10.039
100350.10.007
100360.10.027
\n", "

10037 rows × 2 columns

\n", "
" ], "text/plain": [ " counts amounts\n", "0 0.1 0.049\n", "1 0.1 0.048\n", "2 0.1 0.030\n", "3 0.1 0.025\n", "4 0.1 0.013\n", "5 0.1 0.088\n", "6 0.1 0.055\n", "7 0.1 0.088\n", "8 0.1 0.048\n", "9 0.1 0.032\n", "10 0.1 0.010\n", "11 0.1 0.175\n", "12 0.1 0.109\n", "13 0.1 0.016\n", "14 0.1 0.055\n", "15 0.1 0.088\n", "16 0.1 0.033\n", "17 0.1 0.069\n", "18 0.1 0.035\n", "19 0.1 0.035\n", "20 0.1 0.020\n", "21 0.1 0.039\n", "22 0.1 0.048\n", "23 0.1 0.035\n", "24 0.1 0.039\n", "25 0.1 0.037\n", "26 0.1 0.048\n", "27 0.1 0.058\n", "28 0.1 0.039\n", "29 0.1 0.036\n", "... ... ...\n", "10007 0.1 0.013\n", "10008 0.1 0.108\n", "10009 0.1 0.030\n", "10010 0.1 0.013\n", "10011 0.1 0.089\n", "10012 0.1 0.019\n", "10013 0.1 0.019\n", "10014 0.1 0.020\n", "10015 0.1 0.066\n", "10016 0.1 0.035\n", "10017 0.1 0.009\n", "10018 0.1 0.007\n", "10019 0.1 0.006\n", "10020 0.1 0.018\n", "10021 0.1 0.019\n", "10022 0.1 0.030\n", "10023 0.1 0.033\n", "10024 0.1 0.007\n", "10025 0.1 0.016\n", "10026 0.1 0.019\n", "10027 0.1 0.030\n", "10028 0.1 0.033\n", "10029 0.1 0.056\n", "10030 0.1 0.026\n", "10031 0.1 0.029\n", "10032 0.1 0.035\n", "10033 0.1 0.036\n", "10034 0.1 0.039\n", "10035 0.1 0.007\n", "10036 0.1 0.027\n", "\n", "[10037 rows x 2 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "def DecimalScale(data):\n", " return data / 10**(np.ceil(np.log10(data.abs().max())))\n", "\n", "a = DecimalScale(data['counts'])\n", "b = DecimalScale(data['amounts'])\n", "pd.concat([a, b], axis=1)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
38度剑南春42度海之蓝50度古井贡酒52度泸州老窖53度茅台一品香酥藕三丝鳝鱼三色凉拌手撕兔不加一滴油的酸奶蛋糕五彩藕苗...香辣腐乳炒虾香酥两吃大虾鱼香肉丝拌面鲜美鳝鱼鸡蛋、肉末肠粉麻辣小龙虾黄尾袋鼠西拉子红葡萄酒黄油曲奇饼干黄花菜炒木耳黑米恋上葡萄
00000000000...0000000000
10000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000000
50000000000...0000000000
60000000000...0000000000
70000000000...0000000000
80000000000...0000000000
90000000000...0000000000
100000000000...0000000000
110000000000...0000000000
120000000000...0000000000
130000000000...0000000000
140000000000...0000000000
150000000000...0000000000
160000000000...0000000000
170000000000...0000000000
180000000000...0000000000
190000000000...0000000000
200000000000...0000000000
210000000000...0000000000
220000000000...0000000000
230000000000...0000000000
240000000000...0000000000
250000000000...0000000000
260000000000...0000000000
270000000000...0000000000
280000000000...0000000000
290000000000...0000000000
..................................................................
100070000000000...0000000000
100080000000000...0000000000
100090000000000...0000000000
100100000000000...0000000000
100110000000000...0100000000
100120000000000...0000000000
100130000000000...0000000000
100140000000000...0000000000
100150000000100...0000000000
100160000000000...0000000000
100170000000000...0000000000
100180000000000...0000000000
100190000000000...0000000000
100200000000000...0010000000
100210000000000...0000000000
100220000000000...0000000000
100230000000000...0000000000
100240000000000...0000000000
100250000000000...0000000000
100260000000000...0000000000
100270000000000...0000000000
100280000000000...0000000001
100290000000000...0000000000
100300000000000...0000000000
100310000000000...0000000000
100320000000000...0000000000
100330000000000...0000000000
100340000000000...0000000000
100350000000010...0000000000
100360000000000...0000000000
\n", "

10037 rows × 145 columns

\n", "
" ], "text/plain": [ " 38度剑南春 42度海之蓝 50度古井贡酒 52度泸州老窖 53度茅台 一品香酥藕 三丝鳝鱼 三色凉拌手撕兔 \\\n", "0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 \n", "5 0 0 0 0 0 0 0 0 \n", "6 0 0 0 0 0 0 0 0 \n", "7 0 0 0 0 0 0 0 0 \n", "8 0 0 0 0 0 0 0 0 \n", "9 0 0 0 0 0 0 0 0 \n", "10 0 0 0 0 0 0 0 0 \n", "11 0 0 0 0 0 0 0 0 \n", "12 0 0 0 0 0 0 0 0 \n", "13 0 0 0 0 0 0 0 0 \n", "14 0 0 0 0 0 0 0 0 \n", "15 0 0 0 0 0 0 0 0 \n", "16 0 0 0 0 0 0 0 0 \n", "17 0 0 0 0 0 0 0 0 \n", "18 0 0 0 0 0 0 0 0 \n", "19 0 0 0 0 0 0 0 0 \n", "20 0 0 0 0 0 0 0 0 \n", "21 0 0 0 0 0 0 0 0 \n", "22 0 0 0 0 0 0 0 0 \n", "23 0 0 0 0 0 0 0 0 \n", "24 0 0 0 0 0 0 0 0 \n", "25 0 0 0 0 0 0 0 0 \n", "26 0 0 0 0 0 0 0 0 \n", "27 0 0 0 0 0 0 0 0 \n", "28 0 0 0 0 0 0 0 0 \n", "29 0 0 0 0 0 0 0 0 \n", "... ... ... ... ... ... ... ... ... \n", "10007 0 0 0 0 0 0 0 0 \n", "10008 0 0 0 0 0 0 0 0 \n", "10009 0 0 0 0 0 0 0 0 \n", "10010 0 0 0 0 0 0 0 0 \n", "10011 0 0 0 0 0 0 0 0 \n", "10012 0 0 0 0 0 0 0 0 \n", "10013 0 0 0 0 0 0 0 0 \n", "10014 0 0 0 0 0 0 0 0 \n", "10015 0 0 0 0 0 0 0 1 \n", "10016 0 0 0 0 0 0 0 0 \n", "10017 0 0 0 0 0 0 0 0 \n", "10018 0 0 0 0 0 0 0 0 \n", "10019 0 0 0 0 0 0 0 0 \n", "10020 0 0 0 0 0 0 0 0 \n", "10021 0 0 0 0 0 0 0 0 \n", "10022 0 0 0 0 0 0 0 0 \n", "10023 0 0 0 0 0 0 0 0 \n", "10024 0 0 0 0 0 0 0 0 \n", "10025 0 0 0 0 0 0 0 0 \n", "10026 0 0 0 0 0 0 0 0 \n", "10027 0 0 0 0 0 0 0 0 \n", "10028 0 0 0 0 0 0 0 0 \n", "10029 0 0 0 0 0 0 0 0 \n", "10030 0 0 0 0 0 0 0 0 \n", "10031 0 0 0 0 0 0 0 0 \n", "10032 0 0 0 0 0 0 0 0 \n", "10033 0 0 0 0 0 0 0 0 \n", "10034 0 0 0 0 0 0 0 0 \n", "10035 0 0 0 0 0 0 0 0 \n", "10036 0 0 0 0 0 0 0 0 \n", "\n", " 不加一滴油的酸奶蛋糕 五彩藕苗 ... 香辣腐乳炒虾 香酥两吃大虾 鱼香肉丝拌面 鲜美鳝鱼 鸡蛋、肉末肠粉 麻辣小龙虾 \\\n", "0 0 0 ... 0 0 0 0 0 0 \n", "1 0 0 ... 0 0 0 0 0 0 \n", "2 0 0 ... 0 0 0 0 0 0 \n", "3 0 0 ... 0 0 0 0 0 0 \n", "4 0 0 ... 0 0 0 0 0 0 \n", "5 0 0 ... 0 0 0 0 0 0 \n", "6 0 0 ... 0 0 0 0 0 0 \n", "7 0 0 ... 0 0 0 0 0 0 \n", "8 0 0 ... 0 0 0 0 0 0 \n", "9 0 0 ... 0 0 0 0 0 0 \n", "10 0 0 ... 0 0 0 0 0 0 \n", "11 0 0 ... 0 0 0 0 0 0 \n", "12 0 0 ... 0 0 0 0 0 0 \n", "13 0 0 ... 0 0 0 0 0 0 \n", "14 0 0 ... 0 0 0 0 0 0 \n", "15 0 0 ... 0 0 0 0 0 0 \n", "16 0 0 ... 0 0 0 0 0 0 \n", "17 0 0 ... 0 0 0 0 0 0 \n", "18 0 0 ... 0 0 0 0 0 0 \n", "19 0 0 ... 0 0 0 0 0 0 \n", "20 0 0 ... 0 0 0 0 0 0 \n", "21 0 0 ... 0 0 0 0 0 0 \n", "22 0 0 ... 0 0 0 0 0 0 \n", "23 0 0 ... 0 0 0 0 0 0 \n", "24 0 0 ... 0 0 0 0 0 0 \n", "25 0 0 ... 0 0 0 0 0 0 \n", "26 0 0 ... 0 0 0 0 0 0 \n", "27 0 0 ... 0 0 0 0 0 0 \n", "28 0 0 ... 0 0 0 0 0 0 \n", "29 0 0 ... 0 0 0 0 0 0 \n", "... ... ... ... ... ... ... ... ... ... \n", "10007 0 0 ... 0 0 0 0 0 0 \n", "10008 0 0 ... 0 0 0 0 0 0 \n", "10009 0 0 ... 0 0 0 0 0 0 \n", "10010 0 0 ... 0 0 0 0 0 0 \n", "10011 0 0 ... 0 1 0 0 0 0 \n", "10012 0 0 ... 0 0 0 0 0 0 \n", "10013 0 0 ... 0 0 0 0 0 0 \n", "10014 0 0 ... 0 0 0 0 0 0 \n", "10015 0 0 ... 0 0 0 0 0 0 \n", "10016 0 0 ... 0 0 0 0 0 0 \n", "10017 0 0 ... 0 0 0 0 0 0 \n", "10018 0 0 ... 0 0 0 0 0 0 \n", "10019 0 0 ... 0 0 0 0 0 0 \n", "10020 0 0 ... 0 0 1 0 0 0 \n", "10021 0 0 ... 0 0 0 0 0 0 \n", "10022 0 0 ... 0 0 0 0 0 0 \n", "10023 0 0 ... 0 0 0 0 0 0 \n", "10024 0 0 ... 0 0 0 0 0 0 \n", "10025 0 0 ... 0 0 0 0 0 0 \n", "10026 0 0 ... 0 0 0 0 0 0 \n", "10027 0 0 ... 0 0 0 0 0 0 \n", "10028 0 0 ... 0 0 0 0 0 0 \n", "10029 0 0 ... 0 0 0 0 0 0 \n", "10030 0 0 ... 0 0 0 0 0 0 \n", "10031 0 0 ... 0 0 0 0 0 0 \n", "10032 0 0 ... 0 0 0 0 0 0 \n", "10033 0 0 ... 0 0 0 0 0 0 \n", "10034 0 0 ... 0 0 0 0 0 0 \n", "10035 1 0 ... 0 0 0 0 0 0 \n", "10036 0 0 ... 0 0 0 0 0 0 \n", "\n", " 黄尾袋鼠西拉子红葡萄酒 黄油曲奇饼干 黄花菜炒木耳 黑米恋上葡萄 \n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 0 0 0 0 \n", "5 0 0 0 0 \n", "6 0 0 0 0 \n", "7 0 0 0 0 \n", "8 0 0 0 0 \n", "9 0 0 0 0 \n", "10 0 0 0 0 \n", "11 0 0 0 0 \n", "12 0 0 0 0 \n", "13 0 0 0 0 \n", "14 0 0 0 0 \n", "15 0 0 0 0 \n", "16 0 0 0 0 \n", "17 0 0 0 0 \n", "18 0 0 0 0 \n", "19 0 0 0 0 \n", "20 0 0 0 0 \n", "21 0 0 0 0 \n", "22 0 0 0 0 \n", "23 0 0 0 0 \n", "24 0 0 0 0 \n", "25 0 0 0 0 \n", "26 0 0 0 0 \n", "27 0 0 0 0 \n", "28 0 0 0 0 \n", "29 0 0 0 0 \n", "... ... ... ... ... \n", "10007 0 0 0 0 \n", "10008 0 0 0 0 \n", "10009 0 0 0 0 \n", "10010 0 0 0 0 \n", "10011 0 0 0 0 \n", "10012 0 0 0 0 \n", "10013 0 0 0 0 \n", "10014 0 0 0 0 \n", "10015 0 0 0 0 \n", "10016 0 0 0 0 \n", "10017 0 0 0 0 \n", "10018 0 0 0 0 \n", "10019 0 0 0 0 \n", "10020 0 0 0 0 \n", "10021 0 0 0 0 \n", "10022 0 0 0 0 \n", "10023 0 0 0 0 \n", "10024 0 0 0 0 \n", "10025 0 0 0 0 \n", "10026 0 0 0 0 \n", "10027 0 0 0 0 \n", "10028 0 0 0 1 \n", "10029 0 0 0 0 \n", "10030 0 0 0 0 \n", "10031 0 0 0 0 \n", "10032 0 0 0 0 \n", "10033 0 0 0 0 \n", "10034 0 0 0 0 \n", "10035 0 0 0 0 \n", "10036 0 0 0 0 \n", "\n", "[10037 rows x 145 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.get_dummies(data['dishes_name'])" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.823, 36.4] 5461\n", "(36.4, 71.8] 3157\n", "(71.8, 107.2] 839\n", "(142.6, 178.0] 426\n", "(107.2, 142.6] 154\n", "Name: amounts, dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.cut(data['amounts'], bins=5).value_counts()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(18.0, 32.0] 2107\n", "(39.0, 58.0] 2080\n", "(32.0, 39.0] 1910\n", "(1.0, 18.0] 1891\n", "(58.0, 178.0] 1863\n", "Name: amounts, dtype: int64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def samefreq(data, k):\n", " w = data.quantile(np.arange(0, 1+1/k, 1/k))\n", " return pd.cut(data, w)\n", "samefreq(data['amounts'], k=5).value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }